# ------------------------------------------------------------------------------#
# Description: Create Table A.1: Correlation of explanatory variables with adoption
# Project:     Peer Effects in Voluntary Environmental Policies:
#              An Application to Urban Water Quality
# Author:      Daniel A. Brent | dab320@psu.edu
# Created:     2025-06-11
# ------------------------------------------------------------------------------#

# Clear environment ------------------------------------------------------------
rm(list = ls())
gc()

# Load required packages -------------------------------------------------------
library(data.table)
library(fixest)

options(scipen = 999)

# Load and process adoption data -----------------------------------------------
load("data/adoptions/adoptions.RData")
adopt[, adopted := max(adopt.any, na.rm = TRUE), by = pin]

# Merge peer eligibility -------------------------------------------------------
load("data/bucket/peer_elig_buckets_cs.RData")
dat <- merge(adopt, peer.buckets.cs, by = c("pin", "year"))
rm(peer.buckets.cs)

load("data/bucket/peer_elig_buckets_rg.RData")
dat <- merge(dat, peer.buckets.rg, by = c("pin", "year"))
rm(peer.buckets.rg)

# Create average eligibility ---------------------------------------------------
dat[, elig.peers.avg.100 := (elig.peers.cs.100 + elig.peers.rg.100) / 2]
dat[, elig.peers.avg.20 := (elig.peers.cs.20 + elig.peers.rg.20) / 2]

# Merge total peers ------------------------------------------------------------
load("data/bucket/total_peers.RData")
dat <- merge(dat, total.peers, by = "pin")
rm(total.peers)

# Merge parcel characteristics -------------------------------------------------
load("data/parcel_build/parcel_build.RData")
dat <- merge(dat, parcel.build[, .(pin, zip5, sub.area, sqft, lot, beds, baths,
                                   yearbuilt, val.land, val.improve,
                                   ren.pre90, ren.90.99, ren.00.09, ren.10,
                                   water.prob)], by = "pin")
rm(parcel.build)

# Merge census characteristics -------------------------------------------------
load("data/census/all_parcels_census_2010.RData")
parcels.2010[, blk := as.numeric(paste0(block, blkgrp))]
dat <- merge(dat, parcels.2010[, .(pin, tract, blkgrp, block, blk)])
rm(parcels.2010)

# Create flags and filter ------------------------------------------------------
dat[, assess.value := val.land + val.improve]
dat[, max.elig.cs := max(elig.cs), by = pin]
dat[, max.elig.rg := max(elig.rg), by = pin]

bal <- dat[year == 2020, .(pin, elig.cs, elig.rg, max.elig.cs, max.elig.rg, adopted,
                           elig.peers.cs.100, elig.peers.cs.20,
                           elig.peers.rg.100, elig.peers.rg.20,
                           elig.peers.avg.100, elig.peers.avg.20,
                           sub.area, zip5, tract, blkgrp, blk,
                           sqft, lot, beds, baths, yearbuilt, assess.value,
                           ren.10, ren.00.09)]

# Rescale lot size and assessed value ------------------------------------------
bal[, lot1000 := lot / 1000]
bal[, assess1000 := assess.value / 1000]

# Merge census block group data ------------------------------------------------
load("data/census/census_bg.RData")
census.2014[, blkgrp := as.numeric(paste0(tract, bg))]
census.2014[, active.trans := pub.trans + bike + walk]
census.2014[, degree := college + adv.degree]

bal <- merge(bal, census.2014[, .(blkgrp, med.inc, non.white, non.white.asian,
                                  active.trans, degree, own.occ, gov.assist)],
             by = "blkgrp")
rm(census.2014)

# Standardize explanatory variables --------------------------------------------
bal[, adopted.100 := adopted * 100]
bal[, sqft.sd := (sqft - mean(sqft)) / sd(sqft)]
bal[, lot.sd := (lot - mean(lot)) / sd(lot)]
bal[, beds.sd := (beds - mean(beds)) / sd(beds)]
bal[, baths.sd := (baths - mean(baths)) / sd(baths)]
bal[, yearbuilt.sd := (yearbuilt - mean(yearbuilt)) / sd(yearbuilt)]
bal[, assess.sd := (assess.value - mean(assess.value)) / sd(assess.value)]
bal[, med.inc.sd := (med.inc - mean(med.inc)) / sd(med.inc)]
bal[, non.white.sd := (non.white - mean(non.white)) / sd(non.white)]
bal[, active.trans.sd := (active.trans - mean(active.trans)) / sd(active.trans)]
bal[, degree.sd := (degree - mean(degree)) / sd(degree)]
bal[, own.occ.sd := (own.occ - mean(own.occ)) / sd(own.occ)]
bal[, gov.assist.sd := (gov.assist - mean(gov.assist)) / sd(gov.assist)]

# Run models -------------------------------------------------------------------
m1 <- feols(adopted ~ sqft.sd + lot.sd + beds.sd + baths.sd + yearbuilt.sd + assess.sd |
              blkgrp, cluster = "blkgrp", data = bal[max.elig.cs == 1])

m2 <- feols(adopted ~ sqft.sd + lot.sd + beds.sd + baths.sd + yearbuilt.sd + assess.sd |
              blkgrp, cluster = "blkgrp", data = bal[max.elig.rg == 1])

m3 <- feols(adopted ~ med.inc.sd + non.white.sd + active.trans.sd + degree.sd +
              own.occ.sd + gov.assist.sd | tract, cluster = "tract", data = bal[max.elig.cs == 1])

m4 <- feols(adopted ~ med.inc.sd + non.white.sd + active.trans.sd + degree.sd +
              own.occ.sd + gov.assist.sd | tract, cluster = "tract", data = bal[max.elig.rg == 1])

m5 <- feols(adopted ~ sqft.sd + lot.sd + beds.sd + baths.sd + yearbuilt.sd + assess.sd +
              med.inc.sd + non.white.sd + active.trans.sd + degree.sd +
              own.occ.sd + gov.assist.sd | tract, cluster = "tract", data = bal[max.elig.cs == 1])

m6 <- feols(adopted ~ sqft.sd + lot.sd + beds.sd + baths.sd + yearbuilt.sd + assess.sd +
              med.inc.sd + non.white.sd + active.trans.sd + degree.sd +
              own.occ.sd + gov.assist.sd | tract, cluster = "tract", data = bal[max.elig.rg == 1])

# Set variable name mapping ----------------------------------------------------
varnames <- c(
  'med.inc.sd' = 'Median Income',
  'non.white.sd' = 'Non-white',
  'active.trans.sd' = 'Active Trans.',
  'degree.sd' = 'Degree',
  'own.occ.sd' = 'Owner Occupied',
  'gov.assist.sd' = 'Gov. Assistance',
  'lot.sd' = 'Lot',
  'sqft.sd' = 'Sq. Ft.',
  'beds.sd' = 'Bedrooms',
  'baths.sd' = 'Bathrooms',
  'yearbuilt.sd' = 'Year Built',
  'assess.sd' = 'Assessed Value'
)

# Preview table ----------------------------------------------------------------
etable(m1, m2, m3, m4, m5, m6,
       fixef_sizes = TRUE,
       depvar = FALSE,
       se.below = TRUE,
       dict = varnames,
       extralines = list(
         '-Eligibility' = c('Cistern Only', 'Cistern &',
                            'Cistern Only', 'Cistern &',
                            'Cistern Only', 'Cistern &'),
         '- ' = c('', 'Rain Garden', '', 'Rain Garden', '', 'Rain Garden')
       ))

# Save final table -------------------------------------------------------------
etable(m1, m2, m3, m4, m5, m6,
       fixef_sizes = TRUE,
       depvar = FALSE,
       se.below = TRUE,
       dict = varnames,
       signif.code = c('*' = 0.1, '**' = 0.05, '***' = 0.01),
       digits = "r4",
       fitstat = c("n"),
       headers = c("Property", "Property", "Census", "Census", "Both", "Both"),
       extralines = list(
         "_Sample" = c("Cistern Only", "Cistern &",
                       "Cistern Only", "Cistern &",
                       "Cistern Only", "Cistern &"),
         "_ " = c("", "Rain Garden", "", "Rain Garden", "", "Rain Garden")
       ),
       file = "output/tables/table_a1.tex",
       replace = TRUE)
